'''
scrapy crawl dmoz -o iems.json -t json
'''
from scrapy.spider import BaseSpider
from scrapy.selector import HtmlXPathSelector

from tutorial.items import DmozItem

class DmozSpider(BaseSpider):
   name = "dmoz"
   allowed_domains = ["dmoz.org"]
   start_urls = [
       # "http://www.dmoz.org/Computers/Programming/Languages/Python/Books/",
       "http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/"
   ]

   def parse(self, response):
       # hxs = HtmlXPathSelector(response)
       sites = response.xpath('//*[@id="site-list-content"]/div')
       #sites = hxs.path('//ul/li')
       items = []
       for site in sites:
           item = DmozItem()
           item['title'] = site.xpath('div[3]/a/div/text()').extract()
           item['link'] = site.xpath('div[3]/a/@href').extract()
           item['desc'] = site.xpath('div[3]/div/text()').extract().trim("[\r\n\t]")
           items.append(item)
       return items